
	**************** 
	** Clean UN Population projections (selected countries)
	****************

	** Current and past to merge with selected countries
		import excel using "$dir/rawdata/UN Population Prospects/WPP2022_POP_F03_1_POPULATION_SELECT_AGE_GROUPS_BOTH_SEXES.xlsx", clear firstrow sheet("Estimates") cellrange(A17:BP20541) case(lower)
		
		gen country="uga" if regionsubregioncountryorar=="Uganda"
		replace country="nga" if regionsubregioncountryorar=="Nigeria"
		replace country="tza" if regionsubregioncountryorar=="United Republic of Tanzania"
		replace country="mlw" if regionsubregioncountryorar=="Malawi"
		replace country="eth" if regionsubregioncountryorar=="Ethiopia"
		replace country="gha" if regionsubregioncountryorar=="Ghana"
		replace country="saf" if regionsubregioncountryorar=="South Africa"
		replace country="niger" if regionsubregioncountryorar=="Niger"
		
		keep if !missing(country)
		
		ren total pop_countryyear
		destring pop_countryyear, replace
		
		keep country year pop_countryyear
		bys year: egen pop_totalyear=total(pop_countryyear)
		
		save "$dir/data/population_selectcountries.dta", replace
		
	** Current and past within 5-year age bins to merge with selected countries
		import excel using "$dir/rawdata/UN Population Prospects/WPP2022_POP_F02_1_POPULATION_5-YEAR_AGE_GROUPS_BOTH_SEXES.xlsx", clear firstrow sheet("Estimates") cellrange(A17:AF20541) case(lower)
		
		gen country="uga" if regionsubregioncountryorar=="Uganda"
		replace country="nga" if regionsubregioncountryorar=="Nigeria"
		replace country="tza" if regionsubregioncountryorar=="United Republic of Tanzania"
		replace country="mlw" if regionsubregioncountryorar=="Malawi"
		replace country="eth" if regionsubregioncountryorar=="Ethiopia"
		replace country="gha" if regionsubregioncountryorar=="Ghana"
		replace country="saf" if regionsubregioncountryorar=="South Africa"
		replace country="niger" if regionsubregioncountryorar=="Niger"

		ren l agegroup_pop_0
		ren m agegroup_pop_5
		ren n agegroup_pop_10
		ren o agegroup_pop_15
		ren p agegroup_pop_20
		ren q agegroup_pop_25
		ren r agegroup_pop_30
		ren s agegroup_pop_35
		ren t agegroup_pop_40
		ren u agegroup_pop_45
		ren v agegroup_pop_50
		ren w agegroup_pop_55
		ren x agegroup_pop_60
		ren y agegroup_pop_65
		ren z agegroup_pop_70
		ren aa agegroup_pop_75
		
		foreach var in ab ac ad ae af {
			replace `var'="" if `var'=="..."
		}
		destring ab ac ad ae af, replace
		egen temp=rowtotal(ab ac ad ae af)
		ren temp agegroup_pop_80
		
		keep if !missing(country)
		destring agegroup_pop_*, replace
		
		gen agegroup10_pop_0=agegroup_pop_0+agegroup_pop_5
		gen agegroup10_pop_10=agegroup_pop_10+agegroup_pop_15
		gen agegroup10_pop_20=agegroup_pop_20+agegroup_pop_25
		gen agegroup10_pop_30=agegroup_pop_30+agegroup_pop_35
		gen agegroup10_pop_40=agegroup_pop_40+agegroup_pop_45
		gen agegroup10_pop_50=agegroup_pop_50+agegroup_pop_55
		gen agegroup10_pop_60=agegroup_pop_60+agegroup_pop_65
		gen agegroup10_pop_70=agegroup_pop_70+agegroup_pop_75
		gen agegroup10_pop_80=agegroup_pop_80
		
		keep country year agegroup_pop* agegroup10_pop*
		order country year agegroup_pop* agegroup10_pop*
		
		preserve
		keep country year agegroup_pop*
		reshape long agegroup_pop_, i(country year) j(age_group5)
		ren agegroup_pop_ pop_countryyear_5yr
		tempfile agegroup5
		save `agegroup5', replace
		save "$dir/data/population_5yr_selectcountries.dta", replace
		
		restore
		keep country year agegroup10*
		reshape long agegroup10_pop_, i(country year) j(age_group10)
		ren agegroup10_pop_ pop_countryyear_10yr
		save "$dir/data/population_10yr_selectcountries.dta", replace
		
	** Current estimates
		import excel using "$dir/rawdata/UN Population Prospects/WPP2022_POP_F02_1_POPULATION_5-YEAR_AGE_GROUPS_BOTH_SEXES.xlsx", clear firstrow sheet("Estimates") cellrange(A17:AF20541) case(lower)

		gen keep=1 if regionsubregioncountryorar=="Uganda"
		replace keep=1 if regionsubregioncountryorar=="Nigeria"
		replace keep=1 if regionsubregioncountryorar=="United Republic of Tanzania"
		replace keep=1 if regionsubregioncountryorar=="Malawi"
		replace keep=1 if regionsubregioncountryorar=="Ethiopia"
		replace keep=1 if regionsubregioncountryorar=="Ghana"
		replace keep=1 if regionsubregioncountryorar=="South Africa"
		replace keep=1 if regionsubregioncountryorar=="Niger"
		replace keep=1 if regionsubregioncountryorar=="Sub-Saharan Africa"
		replace keep=1 if regionsubregioncountryorar=="WORLD"
		
		keep if keep==1
		drop keep

		gen countryregion=regionsubregioncountryorar
		drop index variant regionsubregioncountryorar notes locationcode iso3alphacode iso2alphacode type sdmxcode parentcode
		order countryregion
		
		replace countryregion="World" if countryregion=="WORLD"
		replace countryregion="Tanzania" if countryregion=="United Republic of Tanzania"

		foreach var of varlist l-af {
		   local newvar: variable label `var'
		   local newvar = subinstr("`newvar'","-","to",.)
		   local newvar = subinstr("`newvar'","+","plus",.)
		   rename `var' pop_`newvar'
		}
		
		destring pop_*, replace
		
		** total poplation and population above certain ages
		egen pop_total=rowtotal(pop_*)
		egen pop_60plus=rowtotal(pop_60to64 pop_65to69 pop_70to74 pop_75to79 pop_80to84 pop_85to89 pop_90to94 pop_95to99 pop_100plus)
		egen pop_65plus=rowtotal(pop_65to69 pop_70to74 pop_75to79 pop_80to84 pop_85to89 pop_90to94 pop_95to99 pop_100plus)
		egen pop_70plus=rowtotal(pop_70to74 pop_75to79 pop_80to84 pop_85to89 pop_90to94 pop_95to99 pop_100plus)
		egen pop_75plus=rowtotal(pop_75to79 pop_80to84 pop_85to89 pop_90to94 pop_95to99 pop_100plus)
		
		** below 15 and 15-64 for dependency ratios
		egen pop_below15=rowtotal(pop_0to4 pop_5to9 pop_10to14)
		egen pop_15to59=rowtotal(pop_15to19 pop_20to24 pop_25to29 pop_30to34 pop_35to39 pop_40to44 pop_45to49 pop_50to54 pop_55to59)
		egen pop_15to64=rowtotal(pop_15to19 pop_20to24 pop_25to29 pop_30to34 pop_35to39 pop_40to44 pop_45to49 pop_50to54 pop_55to59 pop_60to64)
		
		** depedency ratio
		gen depratio_15to59=(pop_below15+pop_60plus)/pop_15to59
		gen depratio_15to64=(pop_below15+pop_65plus)/pop_15to64
		
		gen oldagedepratio_15to59=pop_60plus/pop_15to59
		gen oldagedepratio_15to64=pop_65plus/pop_15to64
		
		tempfile estimates
		save `estimates'
	
	** Projections
		import excel using "$dir/rawdata/UN Population Prospects/WPP2022_POP_F02_1_POPULATION_5-YEAR_AGE_GROUPS_BOTH_SEXES.xlsx", clear firstrow sheet("Medium variant") cellrange(A17:AF22536) case(lower)

		gen keep=1 if regionsubregioncountryorar=="Uganda"
		replace keep=1 if regionsubregioncountryorar=="Nigeria"
		replace keep=1 if regionsubregioncountryorar=="United Republic of Tanzania"
		replace keep=1 if regionsubregioncountryorar=="Malawi"
		replace keep=1 if regionsubregioncountryorar=="Ethiopia"
		replace keep=1 if regionsubregioncountryorar=="Ghana"
		replace keep=1 if regionsubregioncountryorar=="South Africa"
		replace keep=1 if regionsubregioncountryorar=="Niger"
		replace keep=1 if regionsubregioncountryorar=="Sub-Saharan Africa"
		replace keep=1 if regionsubregioncountryorar=="WORLD"

		keep if keep==1
		drop keep

		gen countryregion=regionsubregioncountryorar
		drop index variant regionsubregioncountryorar notes locationcode iso3alphacode iso2alphacode type sdmxcode parentcode
		order countryregion

		replace countryregion="World" if countryregion=="WORLD"
		replace countryregion="Tanzania" if countryregion=="United Republic of Tanzania"

		foreach var of varlist l-af {
		   local newvar: variable label `var'
		   local newvar = subinstr("`newvar'","-","to",.)
		   local newvar = subinstr("`newvar'","+","plus",.)
		   rename `var' pop_`newvar'
		}
		
		destring pop_*, replace
		
		** total poplation and population above certain ages
		egen pop_total=rowtotal(pop_*)
		egen pop_60plus=rowtotal(pop_60to64 pop_65to69 pop_70to74 pop_75to79 pop_80to84 pop_85to89 pop_90to94 pop_95to99 pop_100plus)
		egen pop_65plus=rowtotal(pop_65to69 pop_70to74 pop_75to79 pop_80to84 pop_85to89 pop_90to94 pop_95to99 pop_100plus)
		egen pop_70plus=rowtotal(pop_70to74 pop_75to79 pop_80to84 pop_85to89 pop_90to94 pop_95to99 pop_100plus)
		egen pop_75plus=rowtotal(pop_75to79 pop_80to84 pop_85to89 pop_90to94 pop_95to99 pop_100plus)
			
		** below 15 and 15-64 for dependency ratios
		egen pop_below15=rowtotal(pop_0to4 pop_5to9 pop_10to14)
		egen pop_15to59=rowtotal(pop_15to19 pop_20to24 pop_25to29 pop_30to34 pop_35to39 pop_40to44 pop_45to49 pop_50to54 pop_55to59)
		egen pop_15to64=rowtotal(pop_15to19 pop_20to24 pop_25to29 pop_30to34 pop_35to39 pop_40to44 pop_45to49 pop_50to54 pop_55to59 pop_60to64)
		
		** depedency ratio
		gen depratio_15to59=(pop_below15+pop_60plus)/pop_15to59
		gen depratio_15to64=(pop_below15+pop_65plus)/pop_15to64
		
		gen oldagedepratio_15to59=pop_60plus/pop_15to59
		gen oldagedepratio_15to64=pop_65plus/pop_15to64
		
	** Append projections and estimates
	
		append using `estimates'
		
		// tab year
		// years available are from 1950 to 2100
		// use 2020 as "today"
		// last 30 years would be 1990-2020
		// next 30 years would be 2020-2050
		// adding 2100
		
	** Reshape to one observation 
		
		local vars "pop_total pop_60plus pop_65plus pop_70plus pop_75plus pop_below15 pop_15to59 pop_15to64 depratio_15to59 depratio_15to64 oldagedepratio_15to59 oldagedepratio_15to64"
		local vars_pop "pop_total pop_60plus pop_65plus pop_70plus pop_75plus pop_below15 pop_15to59 pop_15to64"
		
		keep countryregion year `vars'

		keep if year==1990 | year==2020 | year==2050 | year==2100
		
		foreach var in `vars' {
			ren `var' `var'_
		}
		reshape wide `vars', i(countryregion) j(year)
		
	** Rest of world totals
	
		insobs 1 
		replace countryregion="Rest of world" if missing(countryregion)
		
		foreach var in `vars_pop' {
			
			foreach year of numlist 1990 2020 2050 2100 {
			
				sum `var'_`year' if countryregion=="World"
				replace `var'_`year'=`r(mean)' if countryregion=="Rest of world"
				sum `var'_`year' if countryregion=="Sub-Saharan Africa"
				replace `var'_`year'=`var'_`year'-`r(mean)' if countryregion=="Rest of world"
				
			}
			
		}
		
	** Share of SSA population at and above different ages
		ren pop_total* pop_55plus* // temp for the loop
		forvalues age=55(5)75 {	
			foreach year of numlist 1990 2020 2050 2100 {
				sum pop_`age'plus_`year' if countryregion=="Sub-Saharan Africa"
				gen sharessapop_`age'plus_`year'=pop_`age'plus_`year'/`r(mean)'
				replace sharessapop_`age'plus_`year'=. if countryregion=="Sub-Saharan Africa" | countryregion=="World" | countryregion=="Rest of world"
			}
		}
		
	** Growth rates
		forvalues age=55(5)75 {
			gen growth_pop_`age'plus_1990to2020=(pop_`age'plus_2020-pop_`age'plus_1990)/pop_`age'plus_1990*100
			gen growth_pop_`age'plus_2020to2050=(pop_`age'plus_2050-pop_`age'plus_2020)/pop_`age'plus_2020*100
			gen growth_pop_`age'plus_2050to2100=(pop_`age'plus_2100-pop_`age'plus_2050)/pop_`age'plus_2050*100
			gen growth_pop_`age'plus_2020to2100=(pop_`age'plus_2100-pop_`age'plus_2020)/pop_`age'plus_2020*100
		}
		ren *55plus* *total*
		
	** Save
		save "$dir/data/population_projections_selectcountries.dta", replace
	